6: Graphics

R for faculty, Fall 2024

Jonas Moss

Graphics in R

There are three main options for doing graphics in R:

  • The “base” graphics in the graphics library (preinstalled).
  • The ggplot2 library, which is more flexible but harder to use.
  • The lattice library, which I am unfamiliar with.

We focus on base graphics.

How doe people learn graphics?

  1. Make some graphs to learn the basics.
  2. You want to do something special. How? StackExchange and ChatGPT, more or less.
  3. Or what you vaguely remember seeing.

Scatterplots

Goal

Getting started

plot(x = mtcars$mpg, 
     y = mtcars$disp)

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement")

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles")

Modify colors

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "red")

Change the color of the elements to cyan.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "cyan")

Modify the box

Remove the “box” around the points completely. (The “axes” with numbers will stay there.) Use ?par and look for the bty parameter.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "red",
     bty = "n")

Removing the axes

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     axes = FALSE,
     bty = "n")

Remove all text from the plot, leaving only the points.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "", 
     ylab = "", 
     main = "",
     col = "red",
     axes = FALSE,
     bty = "n")

Modify the point style

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "red",
     pch = 10,
     cex = 1.6)

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     pch = 20,
     cex = 1.6)

Make a scatterplot, with the same data, using triangles. To do this, find the pch code for a triangle (google, e.g., pch r there are multiple!) and make a plot.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     pch = 17,
     cex = 1.6)

Color the important cars

colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = colors,
     pch = 20,
     cex = 1.6)

Use rownames(mtcars) to color your three favorite cars in blue.

My favorite cars are Hornet 4 Drive and Hornet Sportabout. They have indices 4 and 5.

colors <- rep("black", length(mtcars$mpg))
colors[c(4, 5)] <- "blue"
plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = colors,
     pch = 20,
     cex = 1.6)

Point size of important cars

colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = colors,
     pch = 20,
     cex = cexes)

Change the point style of the red elements to a filled diamond.

colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = colors,
     pch = pchs,
     cex = cexes)

Plotting in the right order

colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = colors,
     pch = pchs,
     cex = cexes)
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)

Using xlim and ylim

What happens here?

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information",
     col = "red",
     pch = 18,
     cex = 4)
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information",
     col = "red",
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)

Adding transparency

Transparency can be added using the rbg function. The final argument is the level of transparency (0-1), the first three are red-green-blue values between 0 and 1.

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)

Make the red points even more transparent.

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information",
     col = rgb(1, 0, 0, 0.1),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)

Gridlines

Use grid to add grid lines. Use this when the numbers are important, not just the shape.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     cex = 1.6,
     pch = 20,
     bty = "l")
grid(lwd = 3, lty = 1, col = "black")

The grid line at approximately (15, 300) doesn’t look good, since it’s plotted above the points. Use the point function to fix the problem.

We must plot the points again.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     cex = 1.6,
     pch = 20,
     bty = "l")
grid()
points(x = mtcars$mpg, 
     y = mtcars$disp, 
     col = "black",
     cex = 1.6,
     pch = 20)

Adding lines with abline

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     cex = 1.6,
     pch = 20,
     bty = "l",
     xlim = c(0, 40))
grid()
points(x = mtcars$mpg, 
     y = mtcars$disp, 
     col = "black",
     cex = 1.6,
     pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     cex = 1.6,
     pch = 20,
     bty = "l",
     ylim = c(0, 500),
     xlim = c(0, 40))
grid()
points(x = mtcars$mpg, 
     y = mtcars$disp, 
     col = "black",
     cex = 1.6,
     pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")

Inspect the documentation of abline and add a purple line with slope 1000 and intercept -50.

plot(x = mtcars$mpg, 
     y = mtcars$disp, 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = "black",
     cex = 1.6,
     pch = 20,
     bty = "l",
     ylim = c(0, 500),
     xlim = c(0, 40))
grid()
points(x = mtcars$mpg, 
     y = mtcars$disp, 
     col = "black",
     cex = 1.6,
     pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")
abline(a = 1000, b = -50, lwd = 2, col = "purple")

Adding labels

The text function adds labels to a plot.

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)])

The labels overlap the points. We must fix this manually.

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))

Adding a scatterplot smoother

The loess function estimates a scatterplot smoother.

model <- loess(disp ~ mpg, data = mtcars)
plot(mtcars$mpg, predict(model))

Let’s use a line plot.

plot(mtcars$mpg, predict(model), type = "l")

We must sort the miles per gallons to get a good plot.

x <- sort(mtcars$mpg)
plot(x, predict(model, x), type = "l")

x <- sort(mtcars$mpg)
plot(x, predict(model, x), type = "l", lwd = 2)

x <- sort(mtcars$mpg)
plot(x, predict(model, x), type = "l", lwd = 2, lty = 3, col = "grey")

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")

The scatterplot smoother is plotted above the data points, crearting an ugly effec (see mpg ~ 30). Modify the code so the points are plotted on top of the smoother.

model <- loess(disp ~ mpg, data = mtcars)
plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"), col = c("grey", "black", "red"))

Legends

model <- loess(disp ~ mpg, data = mtcars)
plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"), col = c("grey", "black", "red"))

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", 
       legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
       col = c("grey", "black", "red"), 
       lwd = c(2, NA, NA),
       pch = c(NA, 20, 16),
       lty = c(3, NA, NA))

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     sub = "One gallon equals ~3.8 litres",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", 
       legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
       col = c("grey", "black", rgb(1, 0, 0, 0.6)), 
       lwd = c(2, NA, NA),
       pch = c(NA, 20, 18),
       lty = c(3, NA, NA),
       pt.cex = c(0, 1, 4/1.6),
       bty = "n")

Modify the code so that the “Scatterplot smoother” appears at the bottom of the legend, not the top. The important cars should be on the top.

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     sub = "One gallon equals ~3.8 litres",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", 
       legend = c("Important cars", "Unimportant cars", "Scatterplot smoother"),
       col = c(rgb(1, 0, 0, 0.6), "black", "grey"), 
       lwd = c(NA, NA, 2),
       pch = c(18, 20, NA),
       lty = c(NA, NA, 3),
       pt.cex = c(4/1.6, 1, 0),
       bty = "n")

Saving the graph

pdf(file = "cars.pdf", width = 8, height = 8) 
#
#
#
dev.off()

In practice

pdf(file = "cars.pdf", width = 8, height = 8) 

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     main = "Important information concerning automobiles",
     sub = "One gallon equals ~3.8 litres",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", 
       legend = c("Important cars", "Unimportant cars", "Scatterplot smoother"),
       col = c(rgb(1, 0, 0, 0.6), "black", "grey"), 
       lwd = c(NA, NA, 2),
       pch = c(18, 20, NA),
       lty = c(NA, NA, 3),
       pt.cex = c(4/1.6, 1, 0),
       bty = "n")

dev.off()

Modify height and width

pdf(file = "cars.pdf", width = 16, height = 8) 

plot(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)], 
     xlab = "Miles per gallon", 
     ylab = "Displacement", 
     sub = "One gallon equals ~3.8 litres",
     col = rgb(1, 0, 0, 0.6),
     pch = 18,
     cex = 4,
     xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
     ylim = c(min(mtcars$disp), max(mtcars$disp)),
     cex.lab = 1.5,
     cex.main = 2,
     bty = "l")
points(x = mtcars$mpg[-c(14, 28)], 
       y = mtcars$disp[-c(14, 28)],
       pch = 20,
       col = "black",
       cex = 1.6)
text(x = mtcars$mpg[c(14, 28)], 
     y = mtcars$disp[c(14, 28)],
     labels = rownames(mtcars)[c(14, 28)],
     pos = c(1, 3),
     offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 3, lty = 3, col = "grey")
legend("topright", 
       legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
       col = c("grey", "black", rgb(1, 0, 0, 0.6)), 
       lwd = c(3, NA, NA),
       pch = c(NA, 20, 18),
       lty = c(3, NA, NA),
       pt.cex = c(0, 1, 4/1.6),
       cex = c(1.3,1.3,1.3),
       bty = "n")

dev.off()

Some journals do not accept pdf graphs.

  • BMP, JPEG, PNG and TIFF. Avoid these if possible, as they are lossy.
  • eps is a PostScript format that is often accepted.
  • svg is probably the preferred format.
svg("cars.svg", width = 8, height = 8) 

dev.off()

Plotting parameters

Documentation

  • xlab
  • ylab
  • main

Multiple plots

  • lines
  • points
  • add

Styling

  • color
  • type
  • lty
  • pch

Statistical plots

There are many satistical plots, including:

  • boxplot
  • barplot
  • hist

Box plots

boxplot(mtcars$hp, mtcars$disp)

boxplot(mtcars$hp, mtcars$disp, names = c("Horsepower", "Displacement"))

Change the color of hp to green and displacement to yellow.

boxplot(mtcars$hp, mtcars$disp, names = c("Horsepower", "Displacement"), col = c("green", "yellow"))

Add an informative title to the plot. Change the colors back to something pleasant.

boxplot(mtcars$hp, mtcars$disp, names = c("Horsepower", "Displacement"), col = c("white", "white"), main = "Distribution of car properties")

Make a boxplot of wt, drat, and cyl. Add labels and title.

boxplot(mtcars$wt, mtcars$drat, mtcars$cyl, names = c("Weight", "Rear axle ratio", "Cylinders"), col = "white", main = "Distribution of car properties")

Count plot

table(mtcars$cyl)

 4  6  8 
11  7 14 
barplot(table(mtcars$cyl))

ggplot2

The package ggplot2 is a better option for bar graphs and count graphs. See, e.g., this page.

Histograms

hist(mtcars$mpg)

hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon")

Change the color of the histogram to white. Make the histogram a density histogram, i.e., the integral should integrate to 1. (By default, it shows counts.)

hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE)

The default value for breaks (the number of bins + 1) is too small by default, as R uses an outdated method for calculating the breaks. Experiment with different breaks and choose a “pleasing” one.

hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE, breaks = 10)

Add a call to rug on mtcars$mpg below the histogram.

hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE, breaks = 10)
rug(mtcars$mpg)

Multiple plots

We are sometimes interested in evaluating wether a dataset is approximately normal. We can do this visually (but unreliably!) by plotting a normal density curve on top of a histogram.

mu <- mean(mtcars$mpg)
sigma <- sd(mtcars$mpg)
x <- seq(min(mtcars$mpg), max(mtcars$mpg), length.out = 100)
x
  [1] 10.40000 10.63737 10.87475 11.11212 11.34949 11.58687 11.82424 12.06162
  [9] 12.29899 12.53636 12.77374 13.01111 13.24848 13.48586 13.72323 13.96061
 [17] 14.19798 14.43535 14.67273 14.91010 15.14747 15.38485 15.62222 15.85960
 [25] 16.09697 16.33434 16.57172 16.80909 17.04646 17.28384 17.52121 17.75859
 [33] 17.99596 18.23333 18.47071 18.70808 18.94545 19.18283 19.42020 19.65758
 [41] 19.89495 20.13232 20.36970 20.60707 20.84444 21.08182 21.31919 21.55657
 [49] 21.79394 22.03131 22.26869 22.50606 22.74343 22.98081 23.21818 23.45556
 [57] 23.69293 23.93030 24.16768 24.40505 24.64242 24.87980 25.11717 25.35455
 [65] 25.59192 25.82929 26.06667 26.30404 26.54141 26.77879 27.01616 27.25354
 [73] 27.49091 27.72828 27.96566 28.20303 28.44040 28.67778 28.91515 29.15253
 [81] 29.38990 29.62727 29.86465 30.10202 30.33939 30.57677 30.81414 31.05152
 [89] 31.28889 31.52626 31.76364 32.00101 32.23838 32.47576 32.71313 32.95051
 [97] 33.18788 33.42525 33.66263 33.90000
plot(x, dnorm(x, mu, sigma), type = "l")

Using lines, add the normal density plot to the histogram. Be sure to choose a nice color and line type (lty)!

hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE, breaks = 10)
rug(mtcars$mpg)
lines(x, dnorm(x, mu, sigma), lty = 3, col = "black", lwd = 2)

The Shapiro–Wilk test is among the most widely used tests of normality. Find the Shapiro–Wilk test in R and evaluate the normality of mtcars$mpg formally.

shapiro.test(mtcars$mpg)

    Shapiro-Wilk normality test

data:  mtcars$mpg
W = 0.94756, p-value = 0.1229

We can’t formally reject normality. But it’s not very plausible in the first place.

Quantile-quantile plots

qqnorm(mtcars$mpg)

qqnorm(mtcars$mpg)
qqline(mtcars$mpg)

  1. Remove the title from the plot. (Hint: What is the R way of saying “nothing?”) (ii) Change the “box” around the plot so it has lines emanating only from the lower-left corner.
qqnorm(mtcars$mpg, main = NULL, bty = "l")
qqline(mtcars$mpg)

More plots

Plots side by side

You put multiple plots in the same window using par(mfrow())

par(mfrow = c(2,2))
hist(mtcars$mpg, xlab = "Miles per gallon", col = "white", main = NULL, freq = FALSE, breaks = 10)
rug(mtcars$mpg)
lines(x, dnorm(x, mu, sigma), lty = 3, col = "black", lwd = 2)
qqnorm(mtcars$mpg, main = NULL, bty = "l")
qqline(mtcars$mpg)
boxplot(mtcars$mpg, xlab = "Miles per gallon")
plot(mtcars$mpg, mtcars$disp, xlab = "Miles per gallon", ylab = "Displacement", pch = 20)

plot is generic

The plot function is generic. It can be applied to many different objects, with sometimes unpredictable behaviour!

properties <- datarium::properties
head(properties)
  property_type  buyer_type
1          flat single male
2          flat single male
3          flat single male
4          flat single male
5          flat single male
6          flat single male
property_table <- table(properties$property_type, properties$buyer_type)
property_table
                
                 single male single female married couple family
  flat                    40            30             16     10
  bungalow                 4             4             14     16
  detached house           8            16             26     42
  terrace                 16             7             45     39
plot(property_table)

To find the documentation of a generic, you must know the class of the object you use.

class(property_table)
[1] "table"

Instances of generic functions can be found by placing a . behind the generic method:

?plot.table

Call plot(dnorm, -5, 5). What does the plot function do now? Find its documentation.

Look up plot.function.